{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "elegant-proxy",
   "metadata": {
    "papermill": {
     "duration": 0.011489,
     "end_time": "2021-03-30T21:54:42.895419",
     "exception": false,
     "start_time": "2021-03-30T21:54:42.883930",
     "status": "completed"
    },
    "tags": []
   },
   "source": [
    "# Functional Approximations by Trees and Neural Networks\n",
    "\n",
    "Here we show how the function\n",
    "$$\n",
    "x \\mapsto exp(4 x)\n",
    "$$\n",
    "can be easily approximated by a tree-based methods (Trees, Random Forest) and a neural network (2 Layered Neural Network)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "widespread-mention",
   "metadata": {
    "papermill": {
     "duration": 0.009467,
     "end_time": "2021-03-30T21:54:42.915858",
     "exception": false,
     "start_time": "2021-03-30T21:54:42.906391",
     "status": "completed"
    },
    "tags": []
   },
   "source": [
    "# Functional Approximation by a Tree\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "registered-correction",
   "metadata": {
    "papermill": {
     "duration": 0.694812,
     "end_time": "2021-03-30T21:54:43.620078",
     "exception": false,
     "start_time": "2021-03-30T21:54:42.925266",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "set.seed(1)\n",
    "X_train <- matrix(runif(1000),1000,1)\n",
    "Y_train <- exp(4*X_train)  #Noiseless case  Y=g(X)\n",
    "dim(X_train)\n",
    "\n",
    "library(rpart)\n",
    "\n",
    "# shallow tree\n",
    "TreeModel<- rpart(Y_train~X_train, cp=.01)  #cp is penalty level\n",
    "pred.TM<- predict(TreeModel, newx=X_train)\n",
    "plot(X_train, Y_train, type=\"p\", pch=19, xlab=\"z\", ylab=\"g(z)\")\n",
    "points(X_train, pred.TM, col=3, pch=19)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "banner-sleeve",
   "metadata": {
    "papermill": {
     "duration": 0.294088,
     "end_time": "2021-03-30T21:54:43.926159",
     "exception": false,
     "start_time": "2021-03-30T21:54:43.632071",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "set.seed(1)\n",
    "X_train <- matrix(runif(1000),1000,1)\n",
    "Y_train <- exp(4*X_train)  #Noiseless case  Y=g(X)\n",
    "dim(X_train)\n",
    "\n",
    "library(rpart)\n",
    "\n",
    "TreeModel<- rpart(Y_train~X_train, cp=.0005)  #cp is penalty level\n",
    "pred.TM<- predict(TreeModel, newx=X_train)\n",
    "plot(X_train, Y_train, type=\"p\", pch=19, xlab=\"z\", ylab=\"g(z)\")\n",
    "points(X_train, pred.TM, col=3, pch=19)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "local-saturn",
   "metadata": {
    "papermill": {
     "duration": 0.013444,
     "end_time": "2021-03-30T21:54:43.953303",
     "exception": false,
     "start_time": "2021-03-30T21:54:43.939859",
     "status": "completed"
    },
    "tags": []
   },
   "source": [
    "# Functional Approximation by RF"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "international-serum",
   "metadata": {
    "papermill": {
     "duration": 0.01351,
     "end_time": "2021-03-30T21:54:43.980273",
     "exception": false,
     "start_time": "2021-03-30T21:54:43.966763",
     "status": "completed"
    },
    "tags": []
   },
   "source": [
    "Here we show how the function\n",
    "$$\n",
    "x \\mapsto exp(4 x)\n",
    "$$\n",
    "can be easily approximated by a tree-based method (Random Forest) and a neural network (2 Layered Neural Network)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "further-siemens",
   "metadata": {
    "papermill": {
     "duration": 1.170101,
     "end_time": "2021-03-30T21:54:45.163992",
     "exception": false,
     "start_time": "2021-03-30T21:54:43.993891",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "\n",
    "\n",
    "library(randomForest)\n",
    "\n",
    "RFmodel<- randomForest(Y_train~X_train)\n",
    "pred.RF<- predict(RFmodel, newdata=X_train)\n",
    "plot(X_train, Y_train, type=\"p\", pch=19, xlab=\"z\", ylab=\"g(z)\")\n",
    "points(X_train, pred.RF, col=4,  pch=19,)\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "infrared-belgium",
   "metadata": {
    "papermill": {
     "duration": 0.015474,
     "end_time": "2021-03-30T21:54:45.201078",
     "exception": false,
     "start_time": "2021-03-30T21:54:45.185604",
     "status": "completed"
    },
    "tags": []
   },
   "source": [
    "# Boosted Trees"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "naval-twenty",
   "metadata": {
    "papermill": {
     "duration": 1.40312,
     "end_time": "2021-03-30T21:54:46.619828",
     "exception": false,
     "start_time": "2021-03-30T21:54:45.216708",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "library(gbm)\n",
    "\n",
    "data_train = as.data.frame(cbind(X_train, Y_train))\n",
    "BoostTreemodel<- gbm(Y_train~X_train, distribution= \"gaussian\",  n.trees=100, shrinkage=.01, interaction.depth\n",
    "=4) \n",
    " #shrinkage is \"learning rate\"\n",
    " # n.trees is the number of boosting steps \n",
    "pred.BT<- predict(BoostTreemodel, newdata=data_train, n.trees=100)\n",
    "plot(X_train, Y_train, type=\"p\", pch=19, xlab=\"z\", ylab=\"g(z)\")\n",
    "points(X_train, pred.BT, col=4,  pch=19,)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "listed-michigan",
   "metadata": {
    "papermill": {
     "duration": 0.411341,
     "end_time": "2021-03-30T21:54:47.050501",
     "exception": false,
     "start_time": "2021-03-30T21:54:46.639160",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "library(gbm)\n",
    "\n",
    "data_train = as.data.frame(cbind(X_train, Y_train))\n",
    "BoostTreemodel<- gbm(Y_train~X_train, distribution= \"gaussian\",  n.trees=1000, shrinkage=.01, interaction.depth\n",
    "=4) \n",
    " # shrinkage is \"learning rate\"\n",
    " # n.trees is the number of boosting steps \n",
    "pred.BT<- predict(BoostTreemodel, newdata=data_train, n.trees=1000)\n",
    "plot(X_train, Y_train, type=\"p\", pch=19, xlab=\"z\", ylab=\"g(z)\")\n",
    "points(X_train, pred.BT, col=4,  pch=19,)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "psychological-venice",
   "metadata": {
    "_cell_guid": "b1076dfc-b9ad-4769-8c92-a6c4dae69d19",
    "_uuid": "8f2839f25d086af736a60e9eeb907d3b93b6e0e5",
    "papermill": {
     "duration": 0.018291,
     "end_time": "2021-03-30T21:54:47.087924",
     "exception": false,
     "start_time": "2021-03-30T21:54:47.069633",
     "status": "completed"
    },
    "tags": []
   },
   "source": [
    "# Same Example with a Neural Network"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "behind-redhead",
   "metadata": {
    "papermill": {
     "duration": 0.195046,
     "end_time": "2021-03-30T21:54:47.301402",
     "exception": false,
     "start_time": "2021-03-30T21:54:47.106356",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "library(keras)\n",
    "\n",
    "build_model <- function() {\n",
    "  model <- keras_model_sequential() %>% \n",
    "    layer_dense(units = 200, activation = \"relu\", \n",
    "                input_shape = 1)%>% \n",
    "   layer_dense(units = 20, activation = \"relu\") %>% \n",
    "    layer_dense(units = 1) \n",
    "  \n",
    "  model %>% compile(\n",
    "    optimizer = optimizer_adam(lr = 0.01), \n",
    "    loss = \"mse\", \n",
    "    metrics = c(\"mae\"),\n",
    "  )\n",
    "}\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "modified-monitor",
   "metadata": {
    "papermill": {
     "duration": 7.599438,
     "end_time": "2021-03-30T21:54:54.919929",
     "exception": false,
     "start_time": "2021-03-30T21:54:47.320491",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "model <- build_model()\n",
    "summary(model)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "early-savannah",
   "metadata": {
    "papermill": {
     "duration": 1.37161,
     "end_time": "2021-03-30T21:54:56.310872",
     "exception": false,
     "start_time": "2021-03-30T21:54:54.939262",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "num_epochs <- 1\n",
    "model %>% fit(X_train, Y_train,\n",
    "                    epochs = num_epochs, batch_size = 10, verbose = 0)\n",
    "pred.NN <- model %>% predict(X_train)\n",
    "plot(X_train, Y_train, type=\"p\", pch=19, xlab=\"z\", ylab=\"g(z)\")\n",
    "points(X_train, pred.NN, col=4,  pch=19,)\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "answering-ready",
   "metadata": {
    "papermill": {
     "duration": 13.865941,
     "end_time": "2021-03-30T21:55:10.197721",
     "exception": false,
     "start_time": "2021-03-30T21:54:56.331780",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "num_epochs <- 100\n",
    "model %>% fit(X_train, Y_train,\n",
    "                    epochs = num_epochs, batch_size = 10, verbose = 0)\n",
    "pred.NN <- model %>% predict(X_train)\n",
    "plot(X_train, Y_train, type=\"p\", pch=19, xlab=\"z\", ylab=\"g(z)\")\n",
    "points(X_train, pred.NN, col=4,  pch=19,)\n",
    "\n",
    "\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "R",
   "language": "R",
   "name": "ir"
  },
  "language_info": {
   "codemirror_mode": "r",
   "file_extension": ".r",
   "mimetype": "text/x-r-source",
   "name": "R",
   "pygments_lexer": "r",
   "version": "3.6.3"
  },
  "papermill": {
   "default_parameters": {},
   "duration": 30.682213,
   "end_time": "2021-03-30T21:55:10.531019",
   "environment_variables": {},
   "exception": null,
   "input_path": "__notebook__.ipynb",
   "output_path": "__notebook__.ipynb",
   "parameters": {},
   "start_time": "2021-03-30T21:54:39.848806",
   "version": "2.3.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
